* Clear all data

clear all

set more off



* ===========================

* 1. SET FILE PATH HERE:

* ===========================

local path "C:\Users\icatovic\Downloads"   // <--- UPDATE THIS LINE ONLY



* Import first CSV

import delimited "`path'/probability_manual_first_data.csv", clear

tempfile first

save `first'



* Import second CSV

import delimited "`path'/probability_qp_first_data.csv", clear

append using `first'



* Remove all duplicates of WorkerId entirely

duplicates tag workerid, gen(tag)

drop if tag > 0

drop tag



* Generate highprobability from Answer.Ph.*

gen highprobability = ""

replace highprobability = "QP" if answerph1 == "true"

replace highprobability = "Manual" if answerph2 == "true"

replace highprobability = "Indifferent" if answerph3 == "true"



* Generate lowprobability from Answer.Pl.*

gen lowprobability = ""

replace lowprobability = "QP" if answerpl1 == "true"

replace lowprobability = "Manual" if answerpl2 == "true"

replace lowprobability = "Indifferent" if answerpl3 == "true"

* Define a custom label with desired order

label define choice_lbl 1 "QP" 2 "Indifferent" 3 "Manual"



* Recode highprobability and lowprobability as numeric variables using the custom label

gen byte highprobabilitynum = .

replace highprobabilitynum = 3 if highprobability == "QP"

replace highprobabilitynum = 2 if highprobability == "Indifferent"

replace highprobabilitynum = 1 if highprobability == "Manual"

label values highprobabilitynum choice_lbl



gen byte lowprobabilitynum = .

replace lowprobabilitynum = 3 if lowprobability == "QP"

replace lowprobabilitynum = 2 if lowprobability == "Indifferent"

replace lowprobabilitynum = 1 if lowprobability == "Manual"

label values lowprobabilitynum choice_lbl



* Label the variable names (axis titles) for clarity

label variable highprobabilitynum "High Probability"

label variable lowprobabilitynum "Low Probability"


* Table 3: Tabulate with display order: QP, Indifferent, Manual

tabulate lowprobabilitynum highprobabilitynum, matcell(freqmat)



* Initialize below/above diagonal sums

scalar below = 0

scalar above = 0



* Loop through matrix to sum off-diagonal elements

forvalues i = 1/3 {

    forvalues j = 1/3 {

        if `i' < `j' {

            scalar above = above + freqmat[`i', `j']

        }

        if `i' > `j' {

            scalar below = below + freqmat[`i', `j']

        }

    }

}



* Display the results

display "Below diagonal: " below

display "Above diagonal: " above



* Total number of trials

scalar total = below + above



* Binomial test p-value: probability of getting ≤ min(below, above)

scalar p = 2 * binomialtail(total, max(below, above), 0.5)

display "Two-sided p-value from binomial test: " %6.4f p
